
library(rio)
library(ggplot2)
library(texreg)
library(tidyverse)
library(AER)
library(reshape2)
library(gridExtra)

##set the working directory for the replication archive
working_dir <- "~/Dropbox/dueling project/2nd_paper/replication archive/"

source(paste0(working_dir,"Auxiliary Scripts/RegressionRunner.R"))

##load in the main datasets
railroads <- readRDS(paste0(working_dir,"Data/rails_waterways.RDS"))
turnout <- readRDS(paste0(working_dir,"Data/voter_turnout_1824_1860.RDS"))

##we now need to figure out post offices during election years
el_yrs <- seq(1800, 1860, by = 4)
nearest_decade <- as.numeric(paste0(substring(el_yrs, 1, 3),"0"))
next_decade <- nearest_decade + 10

uniq_counties <- unique(railroads$countyfips)
master_frame <- data.frame("fips" = rep(uniq_counties, each = length(el_yrs)),
                           "year" = rep(el_yrs, length(uniq_counties)),
                           "numPost" = rep(NA, length(rep(el_yrs, length(uniq_counties)))),
                           "state" = rep(NA, length(rep(el_yrs, length(uniq_counties)))))

k <- 1
for (i in 1:length(uniq_counties)){
  tmp_post <- railroads %>% filter(countyfips == uniq_counties[i])
  for (j in 1:length(el_yrs)){
    if (sum(tmp_post$decade <= nearest_decade[j]) > 0){
      master_frame$numPost[k] <-
        tmp_post$numPost[tmp_post$decade == nearest_decade[j]] + 
        0.1*(el_yrs - nearest_decade)[j]*(tmp_post$numPost[tmp_post$decade == next_decade[j]] - 
                                            tmp_post$numPost[tmp_post$decade == nearest_decade[j]])
      master_frame$state[k] <- as.character(tmp_post$state_po)[1]
    }
    k <- k + 1
  }
}

##Now try to merge
all_together <- merge(turnout, master_frame %>% filter(!is.na(numPost)), by = c("fips", "year"))

##calculate lags and such
all_together <-
  all_together %>%
  arrange(fips, year) %>%
  group_by(fips) %>%
  mutate(postLag1 = lag(numPost), postLag2 = lag(numPost, 2), postLag3 = lag(numPost, 3),
         turnLag1 = lag(PresTurnout), turnLag2 = lag(PresTurnout, 2), turnLag3 = lag(PresTurnout, 3)) %>%
  ungroup()

all_together$malePop <- all_together$TotalVotes/(all_together$PresTurnout/100)

##now try to do predictions
post_pred1 <- lm(PresTurnout ~ log(1 + postLag1) + turnLag1 + log(malePop) + factor(year) + factor(state), data = all_together)
post_pred2 <- lm(PresTurnout ~ log(1 + postLag2) + turnLag1 + log(malePop) + factor(year) + factor(state), data = all_together)
post_pred3 <- lm(PresTurnout ~ log(1 + postLag3) + turnLag1 + log(malePop) + factor(year) + factor(state), data = all_together)

turn_pred1 <- lm(log(numPost + 1) ~  turnLag1 + log(postLag1 + 1) + log(malePop) + factor(year) + factor(state), data = all_together)
turn_pred2 <- lm(log(numPost + 1)  ~ turnLag2 + log(postLag1 + 1) + log(malePop) + factor(year) + factor(state), data = all_together)
turn_pred3 <- lm(log(numPost + 1)  ~ turnLag3 + log(postLag1 + 1) + log(malePop) + factor(state), data = all_together)

screenreg(list(post_pred1, post_pred2, post_pred3, turn_pred1, turn_pred2, turn_pred3), omit.coef = "factor")

##make a plot
regData <- function(m){
  summary(m)$coef[2,1:2]
}

all_res <-
  do.call(rbind, lapply(list(post_pred1, post_pred2, post_pred3, turn_pred1, turn_pred2, turn_pred3), regData))

all_res <- as.data.frame(all_res)

all_res$Outcome <- rep(c("Presidential Turnout", "Post Offices"), each = 3)
all_res$Lags <- rep(c("First Lag", "Secong Lag", "Third Lag"), 2)

plot1 <- 
  ggplot(all_res %>% filter(Outcome == "Presidential Turnout"), aes(Lags, Estimate)) +
  geom_point(size = 5, position=position_dodge(width=0.5)) +
  geom_errorbar(
    aes(ymin = Estimate - 1.96*`Std. Error`, ymax = Estimate + 1.96*`Std. Error`),
    width = 0.01,
    linetype = "solid",
    position=position_dodge(width=0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed") + 
  ylab("Voter Turnout \n (% of White Male Eligible Voters)") +
  xlab("Coefficient on Number of Post Offices\n(logged)") +
  theme_bw() +
  theme(legend.title = element_text(size = 20),
        axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20),
        legend.text = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        strip.text.x = element_text(size = 20)) 

plot2 <- 
  ggplot(all_res %>% filter(Outcome != "Presidential Turnout"), aes(Lags, Estimate)) +
  geom_point(size = 5, position=position_dodge(width=0.5)) +
  geom_errorbar(
    aes(ymin = Estimate - 1.96*`Std. Error`, ymax = Estimate + 1.96*`Std. Error`),
    width = 0.01,
    linetype = "solid",
    position=position_dodge(width=0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed") + 
  xlab("Coefficient on Voter Turnout \n (% of White Male Eligible Voters,\nlagged)") +
  ylab("Number of Post Offices (logged)") +
  theme_bw() +
  theme(legend.title = element_text(size = 20),
        axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20),
        legend.text = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        strip.text.x = element_text(size = 20)) 

big_plot <- grid.arrange(plot1,plot2)

ggsave(big_plot, 
       file = paste0(working_dir,"Replication Plots/turnout.pdf"), 
       width = 14, height = 11)


##now we try to do a long lag
all_together_rev <- merge(all_together, 
                          railroads %>% filter(decade == 1800) %>% mutate(post1800 = numPost, fips = countyfips) %>% select(fips, post1800))


